#### installing packages if not installed ####

list.of.packages <- c('tidyverse',
                      'meta',
                      'ggplot2',
                      'lubridate',
                      'estimatr',
                      'ggthemes',
                      'readr',
                      'readxl',
                      'stringr',
                      'rdrobust',
                      'gridExtra')
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)


#### attaching libraries ####

suppressPackageStartupMessages(
  
  {
    
    library(tidyverse)
    library(meta)
    library(ggplot2)
    library(lubridate)
    library(estimatr)    
    library(ggthemes)
    library(readr)
    library(readxl)
    library(stringr)
    library(rdrobust)
    library(gridExtra)
    
  }
  
)

#### loading in ccc data #### 

# from may-july, they have a separate sheet for antiracism protests

ccc_datasets = list.files('data/ccc_data')
ccc_data_list = as.list(rep(NA, length(ccc_datasets)))

for (i in 1:length(ccc_datasets)) {
  
  print(paste0("I iteration ", i))
  
  ccc_data_list[[i]] = 
    read_excel(paste0("data/ccc_data/", ccc_datasets[i]), sheet = 2)
  
  ccc_data_list[[i]] = 
    ccc_data_list[[i]] %>% 
    filter(grepl(x = tolower(ccc_data_list[[i]]$Actor), "black lives") | 
             grepl(x = tolower(ccc_data_list[[i]]$Claim),
                   "black lives|against police brutality|against racism")) %>% 
    dplyr::select(CityTown, Date, EstimateLow) %>% 
    mutate(count = 1)
  
}

ccc_ar_may = read_excel("data/ccc_data/Crowd Estimates May 2020.xlsx", sheet = 3) %>% 
  dplyr::select(`City/Town`, Date, EstimateLow) %>% 
  rename(CityTown = `City/Town`) %>% 
  mutate(count = 1)
ccc_ar_jun = read_excel("data/ccc_data/Crowd Estimates June 2020.xlsx", sheet = 3) %>% 
  dplyr::select(CityTown, Date, EstimateLow) %>% 
  mutate(count = 1)
ccc_ar_jul = read_excel("data/ccc_data/Crowd Estimates July 2020.xlsx", sheet = 3) %>% 
  dplyr::select(CityTown, Date, EstimateLow) %>% 
  mutate(count = 1)

df_date = data.frame(
  date = seq(from = as.Date("2020-01-01"), to = as.Date("2021-01-01"), by = 'day')
)


ccc_df_la = ccc_data_list %>% 
  do.call(rbind.data.frame, .) %>% 
  bind_rows(., ccc_ar_may) %>% 
  bind_rows(., ccc_ar_jun) %>% 
  bind_rows(., ccc_ar_jul) %>% 
  filter(grepl(x = tolower(CityTown), "los angeles")) %>% 
  mutate(date = as.Date(Date)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-28"), 1, 0)) 

ccc_df_sea = ccc_data_list %>% 
  do.call(rbind.data.frame, .) %>% 
  bind_rows(., ccc_ar_may) %>% 
  bind_rows(., ccc_ar_jun) %>% 
  bind_rows(., ccc_ar_jul) %>% 
  filter(grepl(x = tolower(CityTown), "seattle")) %>% 
  mutate(date = as.Date(Date)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) 

ccc_df_aus = ccc_data_list %>% 
  do.call(rbind.data.frame, .) %>% 
  bind_rows(., ccc_ar_may) %>% 
  bind_rows(., ccc_ar_jun) %>% 
  bind_rows(., ccc_ar_jul) %>% 
  filter(grepl(x = tolower(CityTown), "austin")) %>% 
  mutate(date = as.Date(Date)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-30"), 1, 0))

ccc_df_phl = ccc_data_list %>% 
  do.call(rbind.data.frame, .) %>% 
  bind_rows(., ccc_ar_may) %>% 
  bind_rows(., ccc_ar_jun) %>% 
  bind_rows(., ccc_ar_jul) %>% 
  filter(grepl(x = tolower(CityTown), "philadelphia")) %>% 
  mutate(date = as.Date(Date)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-30"), 1, 0))

# plots 

ccc_df_aus_rd = ccc_df_aus %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-29"))

rdaus1 = with(ccc_df_aus_rd, rdrobust(x = blmrv,
                                      y = count, kernel = 'uni', p = 1))
rdaus2 = with(ccc_df_aus_rd, rdrobust(x = blmrv,
                                      y = EstimateLow, kernel = 'uni', p = 1))
rdauslab1 = paste0("RD Est: ", round(rdaus1$coef[1], 2), "\nSE: ",
                   round(rdaus1$se[1], 2), "\n p < 0.001")
rdauslab2 = paste0("RD Est: ", round(rdaus2$coef[1], 2), "\nSE: ",
                   round(rdaus2$se[1], 2), "\n p < 0.05")

protest_desc_plot1 = 
  ccc_df_aus %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-05-29"),
             size = .2) + 
  geom_point(aes(x = date, y = count),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = count, group = blm),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdauslab1,
           x = as.Date("2020-03-15"),
           y = 2.5,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Protest Count",
       title = "A. Austin (BLM Protest Count)") + 
  theme_tufte()  

protest_desc_plot2 = 
  ccc_df_aus %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-28"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-05-28"),
             size = .2) + 
  geom_point(aes(x = date, y = EstimateLow),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = EstimateLow, group = blm),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdauslab2,
           x = as.Date("2020-03-15"),
           y = 1000,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Crowd Size",
       title = "B. Austin (BLM Crowd Size)") + 
  theme_tufte()  

ccc_df_la_rd = ccc_df_la %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-28"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-28"))

rdla1 = with(ccc_df_la_rd, rdrobust(x = blmrv,
                                      y = count, kernel = 'uni', p = 1))
rdla2 = with(ccc_df_la_rd, rdrobust(x = blmrv,
                                      y = EstimateLow, kernel = 'uni', p = 1))
rdlalab1 = paste0("RD Est: ", round(rdla1$coef[1], 2), "\nSE: ",
                   round(rdla1$se[1], 2), "\n p < 0.001")
rdlalab2 = paste0("RD Est: ", round(rdla2$coef[1], 2), "\nSE: ",
                   round(rdla2$se[1], 2), "\n p < 0.01")

protest_desc_plot3 = 
  ccc_df_la %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-28"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-05-28"),
             size = .2) + 
  geom_point(aes(x = date, y = count),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = count, group = blm),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdlalab1,
           x = as.Date("2020-03-15"),
           y = 8,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Protest Count",
       title = "C. Los Angeles (BLM Protest Count)") + 
  theme_tufte()  

protest_desc_plot4 = 
  ccc_df_la %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-28"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-05-28"),
             size = .2) + 
  geom_point(aes(x = date, y = EstimateLow),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = EstimateLow, group = blm),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdlalab2,
           x = as.Date("2020-03-15"),
           y = 12000,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Crowd Size",
       title = "D. Los Angeles (BLM Crowd Size)") + 
  theme_tufte()  

ccc_df_phl_rd = ccc_df_phl %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-30"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-30"))

rdphl1 = with(ccc_df_phl_rd, rdrobust(x = blmrv,
                                    y = count, kernel = 'uni', p = 1))
rdphl2 = with(ccc_df_phl_rd, rdrobust(x = blmrv,
                                    y = EstimateLow, kernel = 'uni', p = 1))
rdphllab1 = paste0("RD Est: ", round(rdphl1$coef[1], 2), "\nSE: ",
                  round(rdphl1$se[1], 2), "\n p < 0.001")
rdphllab2 = paste0("RD Est: ", round(rdphl2$coef[1], 2), "\nSE: ",
                  round(rdphl2$se[1], 2), "\n p < 0.01")

protest_desc_plot5 = 
  ccc_df_phl %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-30"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-05-30"),
             size = .2) + 
  geom_point(aes(x = date, y = count),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = count, group = blm),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdphllab1,
           x = as.Date("2020-03-15"),
           y = 5,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Protest Count",
       title = "E. Philadelphia (BLM Protest Count)") + 
  theme_tufte()  

protest_desc_plot6 = 
  ccc_df_phl %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-30"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-05-30"),
             size = .2) + 
  geom_point(aes(x = date, y = EstimateLow),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = EstimateLow, group = blm),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdphllab2,
           x = as.Date("2020-03-15"),
           y = 5000,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Crowd Size",
       title = "F. Philadelphia (BLM Crowd Size)") + 
  theme_tufte()  

ccc_df_sea_rd = 
  ccc_df_phl %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-29"))

rdsea1 = with(ccc_df_sea_rd, rdrobust(x = blmrv,
                                      y = count, kernel = 'uni', p = 1))
rdsea2 = with(ccc_df_sea_rd, rdrobust(x = blmrv,
                                      y = EstimateLow, kernel = 'uni', p = 1))
rdsealab1 = paste0("RD Est: ", round(rdsea1$coef[1], 2), "\nSE: ",
                   round(rdphl1$se[1], 2), "\n p < 0.001")
rdsealab2 = paste0("RD Est: ", round(rdsea2$coef[1], 2), "\nSE: ",
                   round(rdsea2$se[1], 2), "\n p < 0.05")

protest_desc_plot7 = 
  ccc_df_sea %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-05-29"),
             size = .2) + 
  geom_point(aes(x = date, y = count),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = count, group = blm),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdsealab1,
           x = as.Date("2020-03-15"),
           y = 4,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Protest Count",
       title = "G. Seattle (BLM Protest Count)") + 
  theme_tufte()  

protest_desc_plot8 = 
  ccc_df_sea %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-05-29"),
             size = .2) + 
  geom_point(aes(x = date, y = EstimateLow),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = EstimateLow, group = blm),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdsealab2,
           x = as.Date("2020-03-15"),
           y = 40000,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Crowd Size",
       title = "H. Seattle (BLM Crowd Size)") + 
  theme_tufte()  

protest_desc_plot_grob = 
  arrangeGrob(protest_desc_plot1, protest_desc_plot2,
            protest_desc_plot3, protest_desc_plot4,
            protest_desc_plot5, protest_desc_plot6,
            protest_desc_plot7, protest_desc_plot8,
            ncol = 2)

ggsave(plot = protest_desc_plot_grob, 
       filename = "pics/protest_by_city.jpeg", dpi = 600,
       width = 8, height = 7)

#### loading in stop data ####

load("data/stopdata_la.RData")

# vehicle la data 

hit_rate_veh_la = stopdat1 %>% 
  filter(reason_for_stop == "Traffic violation") %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            veh_contraband = mean(contraband_recovered, na.rm = TRUE),
            veh_contraband_alc = mean(contraband_recovered_alc, na.rm = TRUE),
            veh_contraband_nalc = mean(contraband_recovered_nalc, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            arrest_m = mean(arrest, na.rm = TRUE),
            arrest_sum = sum(arrest, na.rm = TRUE),
            arrest_felony_m = mean(arrest_felony, na.rm = TRUE),
            arrest_infraction_m = mean(arrest_infraction, na.rm = TRUE),
            arrest_misdemeanor_m = mean(arrest_misdemeanor, na.rm = TRUE),
            arrest_felony_s = sum(arrest_felony, na.rm = TRUE),
            arrest_infraction_s = sum(arrest_infraction, na.rm = TRUE),
            arrest_misdemeanor_s = sum(arrest_misdemeanor, na.rm = TRUE)) %>% 
  rename(arrest = arrest_m)

hit_rate_veh_la$blmrv = 
  as.numeric(hit_rate_veh_la$date - 
               min(hit_rate_veh_la$date[hit_rate_veh_la$blm == 1]))

ccc_df_la_ts = 
  ccc_df_la %>% 
  group_by(date) %>% 
  summarize(count_protest = sum(count, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count_protest = ifelse(is.na(count_protest), 0, count_protest)) %>% 
  mutate(crowd_size = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(lcrowd_size = log(crowd_size + 1))

hit_rate_veh_la_ccc = merge(hit_rate_veh_la, ccc_df_la_ts, by = 'date')
hit_rate_veh_la_ccc = 
  hit_rate_veh_la_ccc %>%
  filter(date >= as.Date("2020-01-01") & date <= as.Date("2020-12-31"))

hit_rate_veh_la_ccc$quarter_fe = 
  ifelse(as.numeric(substring(hit_rate_veh_la_ccc$date, 6, 7)) <= 3, 1,
         ifelse(as.numeric(substring(hit_rate_veh_la_ccc$date, 6, 7)) > 3 & 
                  as.numeric(substring(hit_rate_veh_la_ccc$date, 6, 7)) <= 6, 2,
                ifelse(as.numeric(substring(hit_rate_veh_la_ccc$date, 6, 7)) > 7 & 
                         as.numeric(substring(hit_rate_veh_la_ccc$date, 6, 7)) <= 9, 3, 4)))

# vehicle austin data 

load(file = "data/rpf.RData")

rpf_ts = rpf %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE))

rpf_ts$blmrv = 
  rpf_ts$date - min(rpf_ts$date[rpf_ts$blm == 1])

rpf_ts$count_std = 
  (rpf_ts$count - mean(rpf_ts$count, na.rm = TRUE)) / 
  sd(rpf_ts$count, na.rm = TRUE)

rpf_ts = rpf_ts %>% filter(date >= as.Date("2020-01-01"))
rpf_ts = ccc_df_aus %>% 
  group_by(date) %>% 
  summarize(count_protest = sum(count, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count_protest = ifelse(is.na(count_protest), 0, count_protest)) %>% 
  mutate(crowd_size = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(lcrowd_size = log(crowd_size + 1)) %>% 
  merge(., rpf_ts, by = "date")

rpf_ts$quarter_fe = 
  ifelse(as.numeric(substring(rpf_ts$date, 6, 7)) <= 3, 1,
         ifelse(as.numeric(substring(rpf_ts$date, 6, 7)) > 3 & 
                  as.numeric(substring(rpf_ts$date, 6, 7)) <= 6, 2,
                ifelse(as.numeric(substring(rpf_ts$date, 6, 7)) > 7 & 
                         as.numeric(substring(rpf_ts$date, 6, 7)) <= 9, 3, 4)))

rpf_ts = rpf_ts %>% 
  filter(date >= as.Date("2020-01-01") & date <= as.Date("2020-12-31"))

# philly data 

load(file = "data/ph_stops_veh.RData")
stops_ts = stops_veh %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE)) 

stops_ts$blmrv = 
  stops_ts$date - min(stops_ts$date[stops_ts$blm == 1])

stops_ts = 
  stops_ts %>% filter(date >= as.Date("2018-01-01"))

stops_ts$count_std = 
  (stops_ts$count - mean(stops_ts$count, na.rm = TRUE)) / 
  sd(stops_ts$count, na.rm = TRUE)

stops_ts = 
  stops_ts %>% 
  filter(date >= as.Date("2020-01-01") & date <= as.Date("2020-12-31"))

stops_ts = ccc_df_phl %>% 
  group_by(date) %>% 
  summarize(count_protest = sum(count, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count_protest = ifelse(is.na(count_protest), 0, count_protest)) %>% 
  mutate(crowd_size = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(lcrowd_size = log(crowd_size + 1)) %>% 
  merge(., stops_ts, by = "date")

stops_ts$quarter_fe = 
  ifelse(as.numeric(substring(stops_ts$date, 6, 7)) <= 3, 1,
       ifelse(as.numeric(substring(stops_ts$date, 6, 7)) > 3 & 
                as.numeric(substring(stops_ts$date, 6, 7)) <= 6, 2,
              ifelse(as.numeric(substring(stops_ts$date, 6, 7)) > 7 & 
                       as.numeric(substring(stops_ts$date, 6, 7)) <= 9, 3, 4)))


# seattle data

load("data/terry.RData")

terry = terry %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(initiate_off = ifelse(call_type == "-" | call_type == "ONVIEW" | 
                                 call_type == "SCHEDULED EVENT (RECURRING)" | 
                                 call_type == "ALARM CALL (NOT POLICE ALARM)", 1, 0),
         initiate_civ = 
           ifelse(call_type == "911" | 
                    call_type == "TELEPHONE OTHER, NOT 911" | 
                    call_type == "TEXT MESSAGE", 1, 0))

terry_desc = terry %>% 
  filter(initiate_off == 1) %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm)) 

terry_desc$blmrv = 
  terry_desc$date - min(terry_desc$date[terry_desc$blm == 1])

terry_desc$count_std = 
  (terry_desc$count - mean(terry_desc$count, na.rm = TRUE)) / 
  sd(terry_desc$count, na.rm = TRUE)


terry_desc = terry_desc %>% 
  filter(date >= as.Date("2020-01-01") & date <= as.Date("2020-12-31"))

terry_desc = ccc_df_sea %>% 
  group_by(date) %>% 
  summarize(count_protest = sum(count, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count_protest = ifelse(is.na(count_protest), 0, count_protest)) %>% 
  mutate(crowd_size = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(lcrowd_size = log(crowd_size + 1)) %>% 
  merge(., terry_desc, by = "date")

terry_desc$quarter_fe = 
  ifelse(as.numeric(substring(terry_desc$date, 6, 7)) <= 3, 1,
         ifelse(as.numeric(substring(terry_desc$date, 6, 7)) > 3 & 
                  as.numeric(substring(terry_desc$date, 6, 7)) <= 6, 2,
                ifelse(as.numeric(substring(terry_desc$date, 6, 7)) > 7 & 
                         as.numeric(substring(terry_desc$date, 6, 7)) <= 9, 3, 4)))

#### analyzing effect of blm protest net of protests #### 

# loading in covid datasets

covdf1 = read_excel("data/balcovs/covid/austin/Texas COVID-19 New Confirmed Cases by County.xlsx")
covdf2 = read_csv("data/balcovs/covid/los_angeles/LA_County_Covid19_cases_deaths_date_table.csv")
covdf3 = read_csv("data/balcovs/covid/philadelphia/covid_cases_by_date.csv")
covdf4 = read_excel("data/balcovs/covid/seattle_king_cty/COVID19_King-County.xlsx")

covdf1 = 
  read_excel("data/balcovs/covid/austin/Texas COVID-19 New Confirmed Cases by County.xlsx",
             sheet = 1) %>% 
  slice(2:(nrow(.)-4))

colnames(covdf1) = as.character(covdf1[1, ])
# as.Date('2020-03-06')
covdf1 = 
  covdf1 %>% 
  filter(County == 'Travis') %>% 
  t %>% 
  data.frame()

covdf1 = covdf1[2:(nrow(covdf1)-2), ]
covdf1 = 
  data.frame(
    cases = covdf1,
    date = seq.Date(from = as.Date("2020-03-06"), to = as.Date("2020-03-06")+300, by = 'day')
  ) 

covdf1b = 
  read_excel("data/balcovs/covid/austin/Texas COVID-19 New Confirmed Cases by County.xlsx",
             sheet = 2) %>% 
  slice(2:(nrow(.)-4))

colnames(covdf1b) = as.character(covdf1b[1, ])
# as.Date('2020-03-06')
covdf1b = 
  covdf1b %>% 
  filter(County == 'Travis') %>% 
  t %>% 
  data.frame()
covdf1b = covdf1b[2:(nrow(covdf1b)-2), ]

covdf1b = 
  data.frame(
    cases = covdf1b,
    date = seq.Date(from = as.Date("2021-01-01"), to = as.Date("2021-12-31"), by = 'day')
  ) 

covdf1 = 
  bind_rows(covdf1, covdf1b) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-29"))

covdf1$cases = as.numeric(covdf1$cases)

# austin

rpf_ts = 
  covdf1 %>% dplyr::select(date, cases) %>% 
  rename(covid_cases = cases) %>% 
  merge(rpf_ts, ., by = 'date', all.x = TRUE) %>% 
  mutate(covid_cases = ifelse(is.na(covid_cases), 0, covid_cases)) %>% 
  mutate(covid_cases = log(covid_cases + 1))

ausm1 = with(rpf_ts, lm_robust(count ~ blm * blmrv))
ausm2 = with(rpf_ts, lm_robust(count ~ blm * blmrv + count_protest + covid_cases))
ausm3 = with(rpf_ts, lm_robust(count ~ blm * blmrv + lcrowd_size + covid_cases))
ausm4 = with(rpf_ts, lm_robust(count ~ blm + count_protest + covid_cases, fixed_effects = ~ quarter_fe))
ausm5 = with(rpf_ts, lm_robust(count ~ blm + lcrowd_size + covid_cases, fixed_effects = ~ quarter_fe))

texreg::texreg(l = list(ausm1, ausm2, ausm3, ausm4, ausm5),
               booktabs = TRUE,
               include.ci = FALSE,
               include.rmse = FALSE,
               include.adjrs = FALSE,
               caption = "\\textbf{The onset of the BLM protest shifts police behavior net of the intensity of protest activity (Austin)}",
               custom.model.names = paste0("(", seq(from = 1, to = 5), ")"),
               caption.above = TRUE,
               custom.coef.map = list("blm" = "BLM Protest",
                                      "count_protest" = "Protest Count",
                                      "lcrowd_size" = "Log(Crowd Size + 1)",
                                      'covid_cases' = "Log(COVID Cases + 1)"),
               custom.gof.rows = list('RD Effect?' = c("Y", "Y", "Y", "N", "N"),
                                      'DIM Effect?' = c("N", "N", "N", "Y", "Y"),
                                      "Quarter FE" = c("N", "N", "N", "Y", "Y")),
               label = "table:pintaus")

# los angeles

la_covid = 
  read_csv("data/balcovs/covid/los_angeles/LA_County_Covid19_cases_deaths_date_table.csv") 

la_covid$date = as.Date(la_covid$date_use)

la_covid = 
  la_covid %>% 
  dplyr::select(date, new_case) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-28"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-28"))

hit_rate_veh_la_ccc = 
  la_covid %>% 
  rename(covid_cases = new_case) %>% 
  dplyr::select(date, covid_cases) %>% 
  merge(., hit_rate_veh_la_ccc, by = 'date', all.y = TRUE) %>% 
  mutate(covid_cases = ifelse(is.na(covid_cases), 0, covid_cases)) %>% 
  mutate(covid_cases = log(covid_cases + 1)) 


lam1 = with(hit_rate_veh_la_ccc, lm_robust(count ~ blm * blmrv))
lam2 = with(hit_rate_veh_la_ccc, lm_robust(count ~ blm * blmrv + count_protest + covid_cases))
lam3 = with(hit_rate_veh_la_ccc, lm_robust(count ~ blm * blmrv + lcrowd_size + covid_cases))
lam4 = with(hit_rate_veh_la_ccc, lm_robust(count ~ blm + count_protest + covid_cases, fixed_effects = ~ quarter_fe))
lam5 = with(hit_rate_veh_la_ccc, lm_robust(count ~ blm + lcrowd_size + covid_cases, fixed_effects = ~ quarter_fe))

texreg::texreg(l = list(lam1, lam2, lam3, lam4, lam5),
               float.pos = "!htbp",
               booktabs = TRUE,
               include.ci = FALSE,
               include.rmse = FALSE,
               include.adjrs = FALSE,
               caption = "\\textbf{The onset of the BLM protest shifts police behavior net of the intensity of protest activity (Los Angeles)}",
               custom.model.names = paste0("(", seq(from = 1, to = 5), ")"),
               caption.above = TRUE,
               custom.coef.map = list("blm" = "BLM Protest",
                                      "count_protest" = "Protest Count",
                                      "lcrowd_size" = "Log(Crowd Size + 1)",
                                      'covid_cases' = "Log(COVID Cases + 1)"),
               custom.gof.rows = list('RD Effect?' = c("Y", "Y", "Y", "N", "N"),
                                      'DIM Effect?' = c("N", "N", "N", "Y", "Y"),
                                      "Quarter FE" = c("N", "N", "N", "Y", "Y")),
               label = "table:pintla")

# philly 

phl_covid = read_csv("data/balcovs/covid/philadelphia/covid_cases_by_date.csv")
phl_covid$date = as.Date(substring(phl_covid$collection_date, 1, 10))

phl_covid = 
  phl_covid %>% 
  filter(date <= as.Date("2024-01-01")) %>% 
  filter(test_result == "positive") %>% 
  dplyr::select(date, count) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-28"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-28"))

stops_ts = 
  phl_covid %>% 
  dplyr::select(date, count) %>% 
  rename(covid_cases = count) %>% 
  merge(., stops_ts, by = 'date', all.y = TRUE) %>% 
  mutate(covid_cases = ifelse(is.na(covid_cases), 0, covid_cases)) %>% 
  mutate(covid_cases = log(covid_cases + 1))

phlm1 = with(stops_ts, lm_robust(count ~ blm * blmrv))
phlm2 = with(stops_ts, lm_robust(count ~ blm * blmrv + count_protest + covid_cases))
phlm3 = with(stops_ts, lm_robust(count ~ blm * blmrv + lcrowd_size + covid_cases))
phlm4 = with(stops_ts, lm_robust(count ~ blm + count_protest + covid_cases, fixed_effects = ~ quarter_fe))
phlm5 = with(stops_ts, lm_robust(count ~ blm + lcrowd_size + covid_cases, fixed_effects = ~ quarter_fe))

texreg::texreg(l = list(phlm1, phlm2, phlm3, phlm4, phlm5),
               float.pos = "!htbp",
               booktabs = TRUE,
               include.ci = FALSE,
               include.rmse = FALSE,
               include.adjrs = FALSE,
               caption = "\\textbf{The onset of the BLM protest shifts police behavior net of the intensity of protest activity (Philadelphia)}",
               custom.model.names = paste0("(", seq(from = 1, to = 5), ")"),
               caption.above = TRUE,
               custom.coef.map = list("blm" = "BLM Protest",
                                      "count_protest" = "Protest Count",
                                      "lcrowd_size" = "Log(Crowd Size + 1)",
                                      'covid_cases' = "Log(COVID Cases + 1)"),
               custom.gof.rows = list('RD Effect?' = c("Y", "Y", "Y", "N", "N"),
                                      'DIM Effect?' = c("N", "N", "N", "Y", "Y"),
                                      "Quarter FE" = c("N", "N", "N", "Y", "Y")),
               label = "table:pintphl")

# seattle

sea_covid = 
  read_excel("data/balcovs/covid/seattle_king_cty/COVID19_King-County.xlsx",
             sheet = 6)

sea_covid$date = as.Date(sea_covid$date)

sea_covid = 
  sea_covid %>% dplyr::select(date, case_count) %>% 
  filter(date <= as.Date("2023-01-01")) %>% 
  dplyr::select(date, case_count) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-29"))

terry_desc = sea_covid %>% 
  dplyr::select(date, case_count) %>% 
  rename(covid_cases = case_count) %>% 
  merge(., terry_desc, by = 'date', all.y = TRUE) %>% 
  mutate(covid_cases = ifelse(is.na(covid_cases), 0, covid_cases)) %>% 
  mutate(covid_cases = log(covid_cases + 1))

seam1 = with(terry_desc, lm_robust(count ~ blm * blmrv))
seam2 = with(terry_desc, lm_robust(count ~ blm * blmrv + count_protest + covid_cases))
seam3 = with(terry_desc, lm_robust(count ~ blm * blmrv + lcrowd_size + covid_cases))
seam4 = with(terry_desc, lm_robust(count ~ blm + count_protest + covid_cases, fixed_effects = ~ quarter_fe))
seam5 = with(terry_desc, lm_robust(count ~ blm + lcrowd_size + covid_cases, fixed_effects = ~ quarter_fe))

texreg::texreg(l = list(seam1, seam2, seam3, seam4, seam5),
               booktabs = TRUE,
               include.ci = FALSE,
               include.rmse = FALSE,
               include.adjrs = FALSE,
               caption = "\\textbf{The onset of the BLM protest shifts police behavior net of the intensity of protest activity (Seattle)}",
               custom.model.names = paste0("(", seq(from = 1, to = 5), ")"),
               caption.above = TRUE,
               custom.coef.map = list("blm" = "BLM Protest",
                                      "count_protest" = "Protest Count",
                                      "lcrowd_size" = "Log(Crowd Size + 1)",
                                      'covid_cases' = "Log(COVID Cases + 1)"),
               custom.gof.rows = list('RD Effect?' = c("Y", "Y", "Y", "N", "N"),
                                      'DIM Effect?' = c("N", "N", "N", "Y", "Y"),
                                      "Quarter FE" = c("N", "N", "N", "Y", "Y")),
               label = "table:pintphl")

#### relationship between protest intensity and arrest rates across the 4 cities #### 

# austin

load(file = "data/rpf.RData")

hit_rate_traffic_aus = rpf %>% 
  mutate(count = 1) %>% 
  mutate(hit = search_found,
         hit_alc = search_found_alc) %>% 
  mutate(arrest_sum = arrest) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            hit = mean(search_found, na.rm = TRUE),
            hit_alc = mean(hit_alc, na.rm = TRUE),
            arrest = mean(arrest, na.rm = TRUE),
            arrest_sum = sum(arrest_sum, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE))

hit_rate_traffic_aus$blmrv = 
  hit_rate_traffic_aus$date - 
  min(hit_rate_traffic_aus$date[hit_rate_traffic_aus$blm == 1])

hit_rate_traffic_aus = 
  ccc_df_aus %>% 
  group_by(date) %>% 
  summarize(count_protest = sum(count, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count_protest = ifelse(is.na(count_protest), 0, count_protest)) %>% 
  mutate(crowd_size = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(lcrowd_size = log(crowd_size + 1)) %>% 
  merge(., hit_rate_traffic_aus, by = "date")

hit_rate_traffic_aus$quarter_fe = 
  ifelse(as.numeric(substring(hit_rate_traffic_aus$date, 6, 7)) <= 3, 1,
         ifelse(as.numeric(substring(hit_rate_traffic_aus$date, 6, 7)) > 3 & 
                  as.numeric(substring(hit_rate_traffic_aus$date, 6, 7)) <= 6, 2,
                ifelse(as.numeric(substring(hit_rate_traffic_aus$date, 6, 7)) > 7 & 
                         as.numeric(substring(hit_rate_traffic_aus$date, 6, 7)) <= 9, 3, 4)))

hit_rate_traffic_aus = 
  covdf1 %>% 
  dplyr::select(date, cases) %>% 
  rename(covid_cases = cases) %>% 
  merge(., hit_rate_traffic_aus, by = 'date', all.y = TRUE) %>% 
  mutate(covid_cases = ifelse(is.na(covid_cases), 0, covid_cases)) %>% 
  mutate(covid_cases = log(covid_cases + 1))

ausmar1 = with(hit_rate_traffic_aus, lm_robust(arrest ~ blm * blmrv))
ausmar2 = with(hit_rate_traffic_aus, lm_robust(arrest ~ blm * blmrv + count_protest + covid_cases))
ausmar3 = with(hit_rate_traffic_aus, lm_robust(arrest ~ blm * blmrv + lcrowd_size + covid_cases))
ausmar4 = with(hit_rate_traffic_aus, lm_robust(arrest ~ blm + count_protest + covid_cases, fixed_effects = ~ quarter_fe))
ausmar5 = with(hit_rate_traffic_aus, lm_robust(arrest ~ blm + lcrowd_size + covid_cases, fixed_effects = ~ quarter_fe))

texreg::texreg(l = list(ausmar1, ausmar2, ausmar3, ausmar4, ausmar5),
               booktabs = TRUE,
               include.ci = FALSE,
               include.rmse = FALSE,
               include.adjrs = FALSE,
               caption = "\\textbf{The onset of the BLM protest increases the arrest rate, not the intensity of protest activity (Austin)}",
               custom.model.names = paste0("(", seq(from = 1, to = 5), ")"),
               caption.above = TRUE,
               custom.coef.map = list("blm" = "BLM Protest",
                                      "count_protest" = "Protest Count",
                                      "lcrowd_size" = "Log(Crowd Size + 1)",
                                      'covid_cases' = 'Log(COVID Cases + 1)'),
               custom.gof.rows = list('RD Effect?' = c("Y", "Y", "Y", "N", "N"),
                                      'DIM Effect?' = c("N", "N", "N", "Y", "Y"),
                                      "Quarter FE" = c("N", "N", "N", "Y", "Y")),
               label = "table:pintausarr",
               float.pos = "!htbp",
               stars = c(.1, .05, .01, .001),
               symbol = "\\dagger",
               scalebox = .68)

# los angeles

load("data/stopdata_la.RData")

# vehicle hit rate

hit_rate_veh_la = stopdat1 %>% 
  filter(reason_for_stop == "Traffic violation") %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            veh_contraband = mean(contraband_recovered, na.rm = TRUE),
            veh_contraband_alc = mean(contraband_recovered_alc, na.rm = TRUE),
            veh_contraband_nalc = mean(contraband_recovered_nalc, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            arrest_m = mean(arrest, na.rm = TRUE),
            arrest_sum = sum(arrest, na.rm = TRUE),
            arrest_felony_m = mean(arrest_felony, na.rm = TRUE),
            arrest_infraction_m = mean(arrest_infraction, na.rm = TRUE),
            arrest_misdemeanor_m = mean(arrest_misdemeanor, na.rm = TRUE),
            arrest_felony_s = sum(arrest_felony, na.rm = TRUE),
            arrest_infraction_s = sum(arrest_infraction, na.rm = TRUE),
            arrest_misdemeanor_s = sum(arrest_misdemeanor, na.rm = TRUE)) %>% 
  rename(arrest = arrest_m)

hit_rate_veh_la$blmrv = 
  hit_rate_veh_la$date - min(hit_rate_veh_la$date[hit_rate_veh_la$blm == 1])

hit_rate_veh_la = 
  ccc_df_la %>% 
  group_by(date) %>% 
  summarize(count_protest = sum(count, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count_protest = ifelse(is.na(count_protest), 0, count_protest)) %>% 
  mutate(crowd_size = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(lcrowd_size = log(crowd_size + 1)) %>% 
  merge(., hit_rate_veh_la, by = "date")

hit_rate_veh_la$quarter_fe = 
  ifelse(as.numeric(substring(hit_rate_veh_la$date, 6, 7)) <= 3, 1,
         ifelse(as.numeric(substring(hit_rate_veh_la$date, 6, 7)) > 3 & 
                  as.numeric(substring(hit_rate_veh_la$date, 6, 7)) <= 6, 2,
                ifelse(as.numeric(substring(hit_rate_veh_la$date, 6, 7)) > 7 & 
                         as.numeric(substring(hit_rate_veh_la$date, 6, 7)) <= 9, 3, 4)))

hit_rate_veh_la = 
  la_covid %>% 
  rename(covid_cases = new_case) %>% 
  dplyr::select(date, covid_cases) %>% 
  merge(hit_rate_veh_la, ., by = "date", all.y = TRUE) %>% 
  mutate(covid_cases = ifelse(is.na(covid_cases), 0, covid_cases)) %>% 
  mutate(covid_cases = log(covid_cases + 1))

lamar1 = with(hit_rate_veh_la, lm_robust(arrest ~ blm * blmrv))
lamar2 = with(hit_rate_veh_la, lm_robust(arrest ~ blm * blmrv + count_protest + covid_cases))
lamar3 = with(hit_rate_veh_la, lm_robust(arrest ~ blm * blmrv + lcrowd_size + covid_cases))
lamar4 = with(hit_rate_veh_la, lm_robust(arrest ~ blm + count_protest + covid_cases, fixed_effects = ~ quarter_fe))
lamar5 = with(hit_rate_veh_la, lm_robust(arrest ~ blm + lcrowd_size + covid_cases, fixed_effects = ~ quarter_fe))

texreg::texreg(l = list(lamar1, lamar2, lamar3, lamar4, lamar5),
               booktabs = TRUE,
               include.ci = FALSE,
               include.rmse = FALSE,
               include.adjrs = FALSE,
               caption = "\\textbf{The onset of the BLM protest increases the arrest rate, not the intensity of protest activity (Los Angeles)}",
               custom.model.names = paste0("(", seq(from = 1, to = 5), ")"),
               caption.above = TRUE,
               custom.coef.map = list("blm" = "BLM Protest",
                                      "count_protest" = "Protest Count",
                                      "lcrowd_size" = "Log(Crowd Size + 1)",
                                      'covid_cases' = 'Log(COVID Cases + 1)'),
               custom.gof.rows = list('RD Effect?' = c("Y", "Y", "Y", "N", "N"),
                                      'DIM Effect?' = c("N", "N", "N", "Y", "Y"),
                                      "Quarter FE" = c("N", "N", "N", "Y", "Y")),
               label = "table:pintlaarr",
               float.pos = "!htbp",
               stars = c(.1, .05, .01, .001),
               symbol = "\\dagger",
               scalebox = .68)

# philadelphia 

load(file = "data/ph_stops_veh.RData")

# hit rates (vehicle)

hit_rate_veh_phl = stops_veh %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            veh_contraband = mean(veh_contraband, na.rm = TRUE),
            arrest = mean(individual_arrested, na.rm = TRUE),
            arrest_sum = sum(individual_arrested, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE)) 

hit_rate_veh_phl$blmrv = 
  hit_rate_veh_phl$date - min(hit_rate_veh_phl$date[hit_rate_veh_phl$blm == 1])

hit_rate_veh_phl = 
  ccc_df_phl %>% 
  group_by(date) %>% 
  summarize(count_protest = sum(count, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count_protest = ifelse(is.na(count_protest), 0, count_protest)) %>% 
  mutate(crowd_size = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(lcrowd_size = log(crowd_size + 1)) %>% 
  merge(., hit_rate_veh_phl, by = "date")

hit_rate_veh_phl$quarter_fe = 
  ifelse(as.numeric(substring(hit_rate_veh_phl$date, 6, 7)) <= 3, 1,
       ifelse(as.numeric(substring(hit_rate_veh_phl$date, 6, 7)) > 3 & 
                as.numeric(substring(hit_rate_veh_phl$date, 6, 7)) <= 6, 2,
              ifelse(as.numeric(substring(hit_rate_veh_phl$date, 6, 7)) > 7 & 
                       as.numeric(substring(hit_rate_veh_phl$date, 6, 7)) <= 9, 3, 4)))

hit_rate_veh_phl = 
  phl_covid %>% 
  dplyr::select(date, count) %>% 
  rename(covid_cases = count) %>% 
  merge(hit_rate_veh_phl, ., by = 'date', all.x = TRUE) %>% 
  mutate(covid_cases = ifelse(is.na(covid_cases), 0, covid_cases)) %>% 
  mutate(covid_cases = log(covid_cases + 1))

phlmar1 = 
  with(hit_rate_veh_phl, lm_robust(arrest ~ blm * blmrv))
phlmar2 = 
  with(hit_rate_veh_phl, lm_robust(arrest ~ blm * blmrv + count_protest + covid_cases))
phlmar3 = 
  with(hit_rate_veh_phl, lm_robust(arrest ~ blm * blmrv + lcrowd_size + covid_cases))
phlmar4 = 
  with(hit_rate_veh_phl, lm_robust(arrest ~ blm + count_protest + covid_cases, fixed_effects = ~ quarter_fe))
phlmar5 = 
  with(hit_rate_veh_phl, lm_robust(arrest ~ blm + lcrowd_size + covid_cases, fixed_effects = ~ quarter_fe))

texreg::texreg(l = list(phlmar1, phlmar2, phlmar3, phlmar4, phlmar5),
               booktabs = TRUE,
               include.ci = FALSE,
               include.rmse = FALSE,
               include.adjrs = FALSE,
               caption = "\\textbf{The onset of the BLM protest increases the arrest rate, not the intensity of protest activity (Philadelphia)}",
               custom.model.names = paste0("(", seq(from = 1, to = 5), ")"),
               caption.above = TRUE,
               custom.coef.map = list("blm" = "BLM Protest",
                                      "count_protest" = "Protest Count",
                                      "lcrowd_size" = "Log(Crowd Size + 1)",
                                      'covid_cases' = 'Log(COVID Cases + 1)'),
               custom.gof.rows = list('RD Effect?' = c("Y", "Y", "Y", "N", "N"),
                                      'DIM Effect?' = c("N", "N", "N", "Y", "Y"),
                                      "Quarter FE" = c("N", "N", "N", "Y", "Y")),
               label = "table:pintphlarr",
               float.pos = "!htbp",
               stars = c(.1, .05, .01, .001),
               symbol = "\\dagger",
               scalebox = .68)

# seattle 


load("data/terry.RData")

terry = terry %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(initiate_off = ifelse(call_type == "-" | call_type == "ONVIEW" | 
                                 call_type == "SCHEDULED EVENT (RECURRING)" | 
                                 call_type == "ALARM CALL (NOT POLICE ALARM)", 1, 0),
         initiate_civ = 
           ifelse(call_type == "911" | 
                    call_type == "TELEPHONE OTHER, NOT 911" | 
                    call_type == "TEXT MESSAGE", 1, 0))

hit_rate_sea = terry %>% 
  mutate(count = 1) %>% 
  filter(initiate_off == 1) %>% 
  group_by(date) %>% 
  mutate(hit = ifelse(stop_resolution == "Arrest" | 
                        stop_resolution == "Citation / Infraction" | 
                        stop_resolution == "Offense Report" | 
                        stop_resolution == "Referred for Prosecution", 1, 0)) %>% 
  summarize(hit = mean(hit, na.rm = TRUE),
            count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            arrest_m = mean(arrest, na.rm = TRUE),
            arrest_sum = sum(arrest, na.rm = TRUE)) %>% 
  rename(arrest = arrest_m)

hit_rate_sea$blmrv = 
  hit_rate_sea$date - min(hit_rate_sea$date[hit_rate_sea$blm == 1])

hit_rate_sea = 
  ccc_df_sea %>% 
  group_by(date) %>% 
  summarize(count_protest = sum(count, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count_protest = ifelse(is.na(count_protest), 0, count_protest)) %>% 
  mutate(crowd_size = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(lcrowd_size = log(crowd_size + 1)) %>% 
  merge(., hit_rate_sea, by = "date")

hit_rate_sea$quarter_fe = 
  ifelse(as.numeric(substring(hit_rate_sea$date, 6, 7)) <= 3, 1,
         ifelse(as.numeric(substring(hit_rate_sea$date, 6, 7)) > 3 & 
                  as.numeric(substring(hit_rate_sea$date, 6, 7)) <= 6, 2,
                ifelse(as.numeric(substring(hit_rate_sea$date, 6, 7)) > 7 & 
                         as.numeric(substring(hit_rate_sea$date, 6, 7)) <= 9, 3, 4)))

hit_rate_sea = sea_covid %>% 
  dplyr::select(date, case_count) %>% 
  rename(covid_cases = case_count) %>% 
  merge(hit_rate_sea, ., by = 'date', all.y = TRUE) %>% 
  mutate(covid_cases = ifelse(is.na(covid_cases), 0, covid_cases)) %>% 
  mutate(covid_cases = log(covid_cases + 1))

seamar1 = with(hit_rate_sea, lm_robust(arrest ~ blm * blmrv))
seamar2 = with(hit_rate_sea, lm_robust(arrest ~ blm * blmrv + count_protest + covid_cases))
seamar3 = with(hit_rate_sea, lm_robust(arrest ~ blm * blmrv + lcrowd_size + covid_cases))
seamar4 = with(hit_rate_sea, lm_robust(arrest ~ blm + count_protest + covid_cases, fixed_effects = ~ quarter_fe))
seamar5 = with(hit_rate_sea, lm_robust(arrest ~ blm + lcrowd_size + covid_cases, fixed_effects = ~ quarter_fe))

texreg::texreg(l = list(seamar1, seamar2, seamar3, seamar4, seamar5),
               booktabs = TRUE,
               include.ci = FALSE,
               include.rmse = FALSE,
               include.adjrs = FALSE,
               caption = "\\textbf{The onset of the BLM protest increases the arrest rate, not the intensity of protest activity (Philadelphia)}",
               custom.model.names = paste0("(", seq(from = 1, to = 5), ")"),
               caption.above = TRUE,
               custom.coef.map = list("blm" = "BLM Protest",
                                      "count_protest" = "Protest Count",
                                      "lcrowd_size" = "Log(Crowd Size + 1)",
                                      'covid_cases' = 'Log(COVID Cases + 1)'),
               custom.gof.rows = list('RD Effect?' = c("Y", "Y", "Y", "N", "N"),
                                      'DIM Effect?' = c("N", "N", "N", "Y", "Y"),
                                      "Quarter FE" = c("N", "N", "N", "Y", "Y")),
               label = "table:pintseaarr",
               float.pos = "!htbp",
               stars = c(.1, .05, .01, .001),
               symbol = "\\dagger",
               scalebox = .68)

#### jacob blake effect #### 

ccc_df_aus_rd = 
  ccc_df_aus_rd %>% 
  mutate(blake = ifelse(date >= as.Date('2020-08-23'), 1, 0)) %>% 
  mutate(blakerv = date - as.Date("2020-08-23"))

rdaus1 = with(ccc_df_aus_rd, rdrobust(x = blakerv,
                                      y = count, kernel = 'uni', p = 1))
rdaus2 = with(ccc_df_aus_rd, rdrobust(x = blakerv,
                                      y = EstimateLow, kernel = 'uni', p = 1))
rdauslab1 = paste0("RD Est: ", round(rdaus1$coef[1], 2), "\nSE: ",
                   round(rdaus1$se[1], 2),
                   "\n p = ", round(rdaus1$pv[1], 2))
rdauslab2 = paste0("RD Est: ", round(rdaus2$coef[1], 2), "\nSE: ",
                   round(rdaus2$se[1], 2),
                   "\n p = ", round(rdaus2$pv[1], 2))

protest_desc_plot1 = 
  ccc_df_aus %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blake = ifelse(date >= as.Date("2020-08-23"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-08-23"),
             size = .2) + 
  geom_point(aes(x = date, y = count),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = count, group = blake),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdauslab1,
           x = as.Date("2020-03-15"),
           y = 2.5,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Protest Count",
       title = "A. Austin (BLM Protest Count)") + 
  theme_tufte()  

protest_desc_plot2 = 
  ccc_df_aus %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blake = ifelse(date >= as.Date("2020-08-23"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-08-23"),
             size = .2) + 
  geom_point(aes(x = date, y = EstimateLow),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = EstimateLow, group = blake),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdauslab2,
           x = as.Date("2020-03-15"),
           y = 1000,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Crowd Size",
       title = "B. Austin (BLM Crowd Size)") + 
  theme_tufte()  

ccc_df_la_rd = ccc_df_la %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-28"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-28")) %>% 
  mutate(blake = ifelse(date >= as.Date('2020-08-23'), 1, 0)) %>% 
  mutate(blakerv = date - as.Date("2020-08-23"))

rdla1 = with(ccc_df_la_rd, rdrobust(x = blakerv,
                                    y = count, kernel = 'uni', p = 1))
rdla2 = with(ccc_df_la_rd, rdrobust(x = blakerv,
                                    y = EstimateLow, kernel = 'uni', p = 1))
rdlalab1 = paste0("RD Est: ", round(rdla1$coef[1], 2), "\nSE: ",
                  round(rdla1$se[1], 2),
                  "\n p < 0.01")
rdlalab2 = paste0("RD Est: ", round(rdla2$coef[1], 2), "\nSE: ",
                  round(rdla2$se[1], 2), 
                  "\n p = ", round(rdla2$pv[1], 2))

protest_desc_plot3 = 
  ccc_df_la %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blake = ifelse(date >= as.Date("2020-08-23"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-08-23"),
             size = .2) + 
  geom_point(aes(x = date, y = count),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = count, group = blake),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdlalab1,
           x = as.Date("2020-03-15"),
           y = 8,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Protest Count",
       title = "C. Los Angeles (BLM Protest Count)") + 
  theme_tufte()  

protest_desc_plot4 = 
  ccc_df_la %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blake = ifelse(date >= as.Date("2020-08-23"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-08-23"),
             size = .2) + 
  geom_point(aes(x = date, y = EstimateLow),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = EstimateLow, group = blake),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdlalab2,
           x = as.Date("2020-03-15"),
           y = 12000,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Crowd Size",
       title = "D. Los Angeles (BLM Crowd Size)") + 
  theme_tufte()  

ccc_df_phl_rd = ccc_df_phl %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-30"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-30")) %>% 
  mutate(blake = ifelse(date >= as.Date('2020-08-23'), 1, 0)) %>% 
  mutate(blakerv = date - as.Date("2020-08-23"))


rdphl1 = with(ccc_df_phl_rd, rdrobust(x = blakerv,
                                      y = count, kernel = 'uni', p = 1))
rdphl2 = with(ccc_df_phl_rd, rdrobust(x = blakerv,
                                      y = EstimateLow, kernel = 'uni', p = 1))
rdphllab1 = paste0("RD Est: ", round(rdphl1$coef[1], 2), "\nSE: ",
                   round(rdphl1$se[1], 2), 
                   "\n p = ", round(rdphl1$pv[1], 2))
rdphllab2 = paste0("RD Est: ", round(rdphl2$coef[1], 2), "\nSE: ",
                   round(rdphl2$se[1], 2), 
                   "\n p = ", round(rdphl2$pv[1], 2))

protest_desc_plot5 = 
  ccc_df_phl %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blake = ifelse(date >= as.Date("2020-08-23"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-08-23"),
             size = .2) + 
  geom_point(aes(x = date, y = count),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = count, group = blake),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdphllab1,
           x = as.Date("2020-03-15"),
           y = 5,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Protest Count",
       title = "E. Philadelphia (BLM Protest Count)") + 
  theme_tufte()  

protest_desc_plot6 = 
  ccc_df_phl %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blake = ifelse(date >= as.Date("2020-08-23"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-08-23"),
             size = .2) + 
  geom_point(aes(x = date, y = EstimateLow),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = EstimateLow, group = blake),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdphllab2,
           x = as.Date("2020-03-15"),
           y = 5000,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Crowd Size",
       title = "F. Philadelphia (BLM Crowd Size)") + 
  theme_tufte()  

ccc_df_sea_rd = 
  ccc_df_sea %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(blmrv = date - as.Date("2020-05-29")) %>% 
  mutate(blake = ifelse(date >= as.Date('2020-08-23'), 1, 0)) %>% 
  mutate(blakerv = date - as.Date("2020-08-23"))


rdsea1 = with(ccc_df_sea_rd, rdrobust(x = blakerv,
                                      y = count, kernel = 'uni', p = 1))
rdsea2 = with(ccc_df_sea_rd, rdrobust(x = blakerv,
                                      y = EstimateLow, kernel = 'uni', p = 1))
rdsealab1 = paste0("RD Est: ", round(rdsea1$coef[1], 2), "\nSE: ",
                   round(rdsea1$se[1], 2), 
                   "\n p = ", round(rdsea1$pv[1], 2))
rdsealab2 = paste0("RD Est: ", round(rdsea2$coef[1], 2), "\nSE: ",
                   round(rdsea2$se[1], 2), 
                   "\n p = ", round(rdsea2$pv[1], 2))

protest_desc_plot7 = 
  ccc_df_sea %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blake = ifelse(date >= as.Date("2020-08-23"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-08-23"),
             size = .2) + 
  geom_point(aes(x = date, y = count),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = count, group = blake),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdsealab1,
           x = as.Date("2020-03-15"),
           y = 4,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Protest Count",
       title = "G. Seattle (BLM Protest Count)") + 
  theme_tufte()  

protest_desc_plot8 = 
  ccc_df_sea %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count = ifelse(is.na(count), 0, count)) %>% 
  mutate(EstimateLow = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(blake = ifelse(date >= as.Date("2020-08-23"), 1, 0)) %>% 
  ggplot() + 
  geom_vline(xintercept = as.Date("2020-08-23"),
             size = .2) + 
  geom_point(aes(x = date, y = EstimateLow),
             size = .2,
             alpha = .4) + 
  geom_smooth(aes(x = date, y = EstimateLow, group = blake),
              col = "black",
              size = .3) + 
  annotate("text",
           label = rdsealab2,
           x = as.Date("2020-03-15"),
           y = 40000,
           family = 'serif',
           size = 2.5) + 
  labs(x = "Date", y = "Crowd Size",
       title = "H. Seattle (BLM Crowd Size)") + 
  theme_tufte()  

protest_desc_plot_grob = 
  arrangeGrob(protest_desc_plot1, protest_desc_plot2,
              protest_desc_plot3, protest_desc_plot4,
              protest_desc_plot5, protest_desc_plot6,
              protest_desc_plot7, protest_desc_plot8,
              ncol = 2)

ggsave(plot = protest_desc_plot_grob, 
       filename = "pics/protest_by_city_blake.jpeg", dpi = 600,
       width = 8, height = 7)

#### loading in phl data: rr ####

# loading in data

load(file = "data/ph_stops_ped.RData")
load(file = "data/ph_stops_veh.RData")

# rate ratio (pedestrian)

phl_rr_ped = stops_ped %>% 
  group_by(date) %>% 
  summarize(r_blk = sum(r_blk),
            r_lat = sum(r_lat),
            r_wht = sum(r_wht),
            blm = mean(blm)) %>% 
  mutate(r_blk_rate = (r_blk / 614254),
         r_wht_rate = (r_wht / 550102),
         r_lat_rate = (r_lat / 238965)) %>% 
  mutate(ped_rr_bw = r_blk_rate / r_wht_rate,
         ped_rr_lw = r_lat_rate / r_wht_rate) %>% 
  mutate(ped_rr_bw = ifelse(ped_rr_bw == Inf | ped_rr_bw == -Inf, NA, ped_rr_bw),
         ped_rr_lw = ifelse(ped_rr_lw == Inf | ped_rr_lw == -Inf, NA, ped_rr_lw))

phl_rr_ped$blmrv = 
  phl_rr_ped$date - min(phl_rr_ped$date[phl_rr_ped$blm == 1])

# rate ratio (vehicle)

phl_rr_veh = stops_veh %>% 
  group_by(date) %>% 
  summarize(r_blk = sum(r_blk),
            r_lat = sum(r_lat),
            r_wht = sum(r_wht),
            blm = mean(blm)) %>% 
  mutate(r_blk_rate = (r_blk / 614254),
         r_wht_rate = (r_wht / 550102),
         r_lat_rate = (r_lat / 238965)) %>% 
  mutate(veh_rr_bw = r_blk_rate / r_wht_rate,
         veh_rr_lw = r_lat_rate / r_wht_rate) %>% 
  mutate(veh_rr_bw = ifelse(veh_rr_bw == Inf | veh_rr_bw == -Inf, NA, veh_rr_bw),
         veh_rr_lw = ifelse(veh_rr_lw == Inf | veh_rr_lw == -Inf, NA, veh_rr_lw))

phl_rr_veh$blmrv = 
  phl_rr_veh$date - min(phl_rr_veh$date[phl_rr_veh$blm == 1])

phl_rr_veh = 
  phl_covid %>% 
  dplyr::select(date, count) %>% 
  rename(covid_cases = count) %>% 
  merge(phl_rr_veh, ., by = 'date', all.x = TRUE) %>% 
  mutate(covid_cases = ifelse(is.na(covid_cases), 0, covid_cases)) %>% 
  mutate(covid_cases = log(covid_cases + 1))

#### loading in sea data: rr ####

load("data/terry.RData")

terry = terry %>% 
  mutate(blm = ifelse(date >= as.Date("2020-05-29"), 1, 0)) %>% 
  mutate(initiate_off = ifelse(call_type == "-" | call_type == "ONVIEW" | 
                                 call_type == "SCHEDULED EVENT (RECURRING)" | 
                                 call_type == "ALARM CALL (NOT POLICE ALARM)", 1, 0),
         initiate_civ = 
           ifelse(call_type == "911" | 
                    call_type == "TELEPHONE OTHER, NOT 911" | 
                    call_type == "TEXT MESSAGE", 1, 0))

sea_rr = 
  terry %>% 
  mutate(count = 1,
         r_unk = ifelse(subject_race == "Unknown" | subject_race == "-", 1, 0),
         r_wht = ifelse(subject_race == "White", 1, 0),
         r_asn = ifelse(subject_race == "Asian", 1, 0),
         r_blk = ifelse(subject_race == "Black or African American", 1, 0),
         r_lat = ifelse(subject_race == "Hispanic", 1, 0)) %>% 
  filter(initiate_off == 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE),
            arrest = mean(arrest, na.rm = TRUE),
            r_lat = sum(r_lat, na.rm = TRUE),
            r_wht = sum(r_wht, na.rm = TRUE),
            r_blk = sum(r_blk, na.rm = TRUE)) %>% 
  filter(date >= as.Date("2019-06-01")) %>% 
  mutate(wht_rate = (r_wht / (.595 * 737015)) * 10000,
         blk_rate = (r_blk / (.068 * 737015)) * 10000,
         lat_rate = (r_lat / (.082 * 737015)) * 10000) %>% 
  mutate(rr_bw = blk_rate / wht_rate) %>% 
  mutate(rr_lw = lat_rate / wht_rate) %>%
  mutate(rr_bw = ifelse(rr_bw == Inf | is.na(rr_bw), NA, rr_bw)) %>% 
  mutate(rr_lw = ifelse(rr_lw == Inf | is.na(rr_lw), NA, rr_lw)) 

sea_rr$blmrv = 
  sea_rr$date - min(sea_rr$date[sea_rr$blm == 1])


sea_rr = merge(sea_rr, sea_covid %>% dplyr::select(date, case_count), by = 'date')
sea_rr$case_count = log(sea_rr$case_count + 1)

#### assessing effect of BLM protest on rr conditional on protest activity #### 

sea_rr = 
  ccc_df_sea %>% 
  group_by(date) %>% 
  summarize(count_protest = sum(count, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count_protest = ifelse(is.na(count_protest), 0, count_protest)) %>% 
  mutate(crowd_size = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(lcrowd_size = log(crowd_size + 1)) %>% 
  merge(., sea_rr, by = "date")

phl_rr_veh = 
  ccc_df_phl %>% 
  group_by(date) %>% 
  summarize(count_protest = sum(count, na.rm = TRUE),
            EstimateLow = sum(EstimateLow, na.rm = TRUE)) %>% 
  merge(., df_date, by = "date", all.y = TRUE) %>% 
  mutate(count_protest = ifelse(is.na(count_protest), 0, count_protest)) %>% 
  mutate(crowd_size = ifelse(is.na(EstimateLow), 0, EstimateLow)) %>% 
  mutate(lcrowd_size = log(crowd_size + 1)) %>% 
  merge(., phl_rr_veh, by = "date")

sea_rr$quarter_fe = 
  ifelse(as.numeric(substring(sea_rr$date, 6, 7)) <= 3, 1,
         ifelse(as.numeric(substring(sea_rr$date, 6, 7)) > 3 & 
                  as.numeric(substring(sea_rr$date, 6, 7)) <= 6, 2,
                ifelse(as.numeric(substring(sea_rr$date, 6, 7)) > 7 & 
                         as.numeric(substring(sea_rr$date, 6, 7)) <= 9, 3, 4)))

phl_rr_veh$quarter_fe = 
  ifelse(as.numeric(substring(phl_rr_veh$date, 6, 7)) <= 3, 1,
         ifelse(as.numeric(substring(phl_rr_veh$date, 6, 7)) > 3 & 
                  as.numeric(substring(phl_rr_veh$date, 6, 7)) <= 6, 2,
                ifelse(as.numeric(substring(phl_rr_veh$date, 6, 7)) > 7 & 
                         as.numeric(substring(phl_rr_veh$date, 6, 7)) <= 9, 3, 4)))


rrseam1 = with(sea_rr, lm_robust(rr_bw ~ blm * blmrv))
rrseam2 = with(sea_rr, lm_robust(rr_bw ~ blm * blmrv + count_protest + case_count))
rrseam3 = with(sea_rr, lm_robust(rr_bw ~ blm * blmrv + lcrowd_size + case_count))
rrseam4 = with(sea_rr, lm_robust(rr_bw ~ blm + count_protest + case_count, fixed_effects = ~ quarter_fe))
rrseam5 = with(sea_rr, lm_robust(rr_bw ~ blm + lcrowd_size + case_count, fixed_effects = ~ quarter_fe))

texreg::texreg(l = list(rrseam1, rrseam2, rrseam3, rrseam4, rrseam5),
               booktabs = TRUE,
               include.ci = FALSE,
               include.rmse = FALSE,
               include.adjrs = FALSE,
               caption = "\\textbf{The onset of the BLM protest reduces the Black/white stop rate ratio net of the intensity of protest activity (Seattle)}",
               custom.model.names = paste0("(", seq(from = 1, to = 5), ")"),
               caption.above = TRUE,
               custom.coef.map = list("blm" = "BLM Protest",
                                      "count_protest" = "Protest Count",
                                      "lcrowd_size" = "Log(Crowd Size + 1)"),
               custom.gof.rows = list('RD Effect?' = c("Y", "Y", "Y", "N", "N"),
                                      'DIM Effect?' = c("N", "N", "N", "Y", "Y"),
                                      "Quarter FE" = c("N", "N", "N", "Y", "Y")),
               label = "table:rrseabw")

rrphlm1 = with(phl_rr_veh, lm_robust(veh_rr_bw ~ blm * blmrv))
rrphlm2 = with(phl_rr_veh, lm_robust(veh_rr_bw ~ blm * blmrv + count_protest + covid_cases))
rrphlm3 = with(phl_rr_veh, lm_robust(veh_rr_bw ~ blm * blmrv + lcrowd_size + covid_cases))
rrphlm4 = with(phl_rr_veh, lm_robust(veh_rr_bw ~ blm + count_protest + covid_cases, fixed_effects = ~ quarter_fe))
rrphlm5 = with(phl_rr_veh, lm_robust(veh_rr_bw ~ blm + lcrowd_size + covid_cases, fixed_effects = ~ quarter_fe))

texreg::texreg(l = list(rrseam1, rrseam2, rrseam3, rrseam4, rrseam5),
               booktabs = TRUE,
               include.ci = FALSE,
               include.rmse = FALSE,
               include.adjrs = FALSE,
               caption = "\\textbf{The onset of the BLM protest reduces the Black/white stop rate ratio net of the intensity of protest activity (Seattle)}",
               custom.model.names = paste0("(", seq(from = 1, to = 5), ")"),
               caption.above = TRUE,
               custom.coef.map = list("blm" = "BLM Protest",
                                      "count_protest" = "Protest Count",
                                      "lcrowd_size" = "Log(Crowd Size + 1)"),
               custom.gof.rows = list('RD Effect?' = c("Y", "Y", "Y", "N", "N"),
                                      'DIM Effect?' = c("N", "N", "N", "Y", "Y"),
                                      "Quarter FE" = c("N", "N", "N", "Y", "Y")),
               label = "table:rrseabw")

texreg::texreg(l = list(rrphlm1, rrphlm2, rrphlm3, rrphlm4, rrphlm5),
               booktabs = TRUE,
               include.ci = FALSE,
               include.rmse = FALSE,
               include.adjrs = FALSE,
               caption = "\\textbf{The onset of the BLM protest reduces the Black/white stop rate ratio net of the intensity of protest activity (Philadelphia)}",
               custom.model.names = paste0("(", seq(from = 1, to = 5), ")"),
               caption.above = TRUE,
               custom.coef.map = list("blm" = "BLM Protest",
                                      "count_protest" = "Protest Count",
                                      "lcrowd_size" = "Log(Crowd Size + 1)"),
               custom.gof.rows = list('RD Effect?' = c("Y", "Y", "Y", "N", "N"),
                                      'DIM Effect?' = c("N", "N", "N", "Y", "Y"),
                                      "Quarter FE" = c("N", "N", "N", "Y", "Y")),
               label = "table:rrphlbw")
